{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyOZhPcZe61RhDjhEFQv0vrl",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/liltom-eth/llama2-webui/blob/main/colab/webui_CodeLlama_7B_Instruct_GPTQ.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "7O5JSosg5-rx"
      },
      "outputs": [],
      "source": [
        "!pip install -U llama2-wrapper==0.1.12"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content\n",
        "!git clone https://github.com/liltom-eth/llama2-webui\n",
        "\n",
        "%cd /content/llama2-webui\n",
        "!python -m llama2_wrapper.download --repo_id TheBloke/CodeLlama-7B-Instruct-GPTQ\n",
        "\n",
        "%cd /content/llama2-webui\n",
        "!python app.py --backend_type gptq --model_path ./models/CodeLlama-7B-Instruct-GPTQ/ --share True"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Y6A7bJdkmzY8",
        "outputId": "0d702a7d-68ab-4747-f012-246d4dee3718"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content\n",
            "fatal: destination path 'llama2-webui' already exists and is not an empty directory.\n",
            "/content/llama2-webui\n",
            "Start downloading model TheBloke/CodeLlama-7B-Instruct-GPTQ to: ./models/CodeLlama-7B-Instruct-GPTQ\n",
            "Fetching 15 files:   0% 0/15 [00:00<?, ?it/s]\n",
            "Downloading (…)d0d05/.gitattributes: 100% 1.52k/1.52k [00:00<00:00, 7.94MB/s]\n",
            "Fetching 15 files:   7% 1/15 [00:01<00:16,  1.15s/it]\n",
            "Downloading (…)478d0d05/LICENSE.txt: 100% 7.02k/7.02k [00:00<00:00, 31.6MB/s]\n",
            "\n",
            "Downloading (…)478d0d05/config.json: 100% 1.25k/1.25k [00:00<00:00, 7.95MB/s]\n",
            "\n",
            "Downloading (…)nfiguration_llama.py: 100% 8.56k/8.56k [00:00<00:00, 41.7MB/s]\n",
            "\n",
            "Downloading (…)81b84478d0d05/Notice: 100% 112/112 [00:00<00:00, 750kB/s]\n",
            "\n",
            "Downloading (…)neration_config.json: 100% 132/132 [00:00<00:00, 836kB/s]\n",
            "\n",
            "Downloading (…)8d0d05/USE_POLICY.md: 100% 105/105 [00:00<00:00, 686kB/s]\n",
            "\n",
            "Downloading (…)84478d0d05/README.md: 100% 22.0k/22.0k [00:00<00:00, 59.5MB/s]\n",
            "\n",
            "Downloading (…)05/modeling_llama.py: 100% 45.9k/45.9k [00:00<00:00, 27.5MB/s]\n",
            "\n",
            "Downloading (…)quantize_config.json: 100% 187/187 [00:00<00:00, 1.34MB/s]\n",
            "\n",
            "Downloading (…)cial_tokens_map.json: 100% 411/411 [00:00<00:00, 2.82MB/s]\n",
            "\n",
            "Downloading (…)d0d05/tokenizer.json:   0% 0.00/1.84M [00:00<?, ?B/s]\u001b[A\n",
            "\n",
            "Downloading (…)okenizer_config.json: 100% 824/824 [00:00<00:00, 5.75MB/s]\n",
            "\n",
            "\n",
            "Downloading model.safetensors:   0% 0.00/3.90G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "\n",
            "Downloading tokenizer.model: 100% 500k/500k [00:00<00:00, 16.3MB/s]\n",
            "\n",
            "Downloading (…)d0d05/tokenizer.json: 100% 1.84M/1.84M [00:00<00:00, 5.47MB/s]\n",
            "\n",
            "\n",
            "Downloading model.safetensors:   0% 10.5M/3.90G [00:00<01:08, 56.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   1% 21.0M/3.90G [00:00<00:57, 67.1MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   1% 31.5M/3.90G [00:00<00:51, 75.5MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   1% 52.4M/3.90G [00:00<00:40, 94.5MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   2% 73.4M/3.90G [00:00<00:33, 113MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   2% 94.4M/3.90G [00:00<00:28, 133MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   3% 115M/3.90G [00:00<00:25, 148MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   3% 136M/3.90G [00:01<00:24, 156MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   4% 157M/3.90G [00:01<00:22, 167MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   5% 178M/3.90G [00:01<00:22, 168MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   5% 199M/3.90G [00:01<00:21, 169MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   6% 220M/3.90G [00:01<00:21, 170MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   6% 241M/3.90G [00:01<00:21, 174MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   7% 262M/3.90G [00:01<00:20, 177MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   7% 283M/3.90G [00:02<01:08, 52.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   8% 315M/3.90G [00:02<00:47, 75.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   9% 346M/3.90G [00:03<00:36, 97.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:   9% 367M/3.90G [00:03<00:31, 111MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  10% 388M/3.90G [00:03<00:28, 122MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  10% 409M/3.90G [00:03<00:26, 134MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  11% 430M/3.90G [00:03<00:24, 141MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  12% 461M/3.90G [00:03<00:21, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  12% 482M/3.90G [00:03<00:20, 165MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  13% 503M/3.90G [00:04<00:20, 166MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  13% 524M/3.90G [00:04<00:19, 170MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  14% 556M/3.90G [00:04<00:18, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  15% 577M/3.90G [00:04<00:18, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  15% 598M/3.90G [00:04<00:18, 183MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  16% 619M/3.90G [00:04<00:17, 184MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  16% 640M/3.90G [00:04<00:17, 184MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  17% 661M/3.90G [00:04<00:18, 178MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  17% 682M/3.90G [00:04<00:17, 180MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  18% 703M/3.90G [00:05<00:17, 180MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  19% 724M/3.90G [00:05<00:17, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  19% 744M/3.90G [00:05<00:18, 171MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  20% 765M/3.90G [00:05<00:18, 173MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  20% 786M/3.90G [00:05<00:17, 175MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  21% 807M/3.90G [00:05<00:17, 178MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  21% 828M/3.90G [00:05<00:17, 180MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  22% 849M/3.90G [00:05<00:16, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  22% 870M/3.90G [00:07<01:37, 30.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  23% 891M/3.90G [00:08<01:13, 40.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  24% 923M/3.90G [00:08<00:50, 59.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  24% 944M/3.90G [00:08<00:42, 70.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  25% 975M/3.90G [00:08<00:30, 94.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  26% 996M/3.90G [00:08<00:27, 107MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  26% 1.02G/3.90G [00:08<00:23, 121MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  27% 1.04G/3.90G [00:08<00:21, 134MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  27% 1.06G/3.90G [00:08<00:20, 141MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  28% 1.08G/3.90G [00:09<00:18, 151MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  28% 1.10G/3.90G [00:09<00:17, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  29% 1.12G/3.90G [00:09<00:16, 166MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  29% 1.14G/3.90G [00:09<00:16, 171MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  30% 1.16G/3.90G [00:09<00:15, 175MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  30% 1.18G/3.90G [00:09<00:15, 178MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  31% 1.21G/3.90G [00:09<00:15, 179MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  31% 1.23G/3.90G [00:09<00:14, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  32% 1.25G/3.90G [00:09<00:14, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  33% 1.27G/3.90G [00:10<00:23, 113MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  33% 1.29G/3.90G [00:10<00:20, 128MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  34% 1.31G/3.90G [00:10<00:18, 139MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  34% 1.33G/3.90G [00:10<00:17, 150MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  35% 1.35G/3.90G [00:10<00:16, 158MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  35% 1.37G/3.90G [00:12<01:24, 29.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  36% 1.41G/3.90G [00:12<00:55, 45.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  37% 1.44G/3.90G [00:13<00:39, 63.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  37% 1.46G/3.90G [00:13<00:33, 72.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  38% 1.48G/3.90G [00:13<00:29, 82.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  38% 1.50G/3.90G [00:13<00:24, 98.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  39% 1.53G/3.90G [00:13<00:19, 124MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  40% 1.55G/3.90G [00:13<00:17, 132MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  40% 1.57G/3.90G [00:13<00:16, 143MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  41% 1.59G/3.90G [00:14<00:15, 153MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  41% 1.61G/3.90G [00:14<00:14, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  42% 1.64G/3.90G [00:14<00:13, 167MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  43% 1.66G/3.90G [00:14<00:13, 171MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  43% 1.68G/3.90G [00:14<00:12, 177MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  44% 1.70G/3.90G [00:14<00:12, 174MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  44% 1.72G/3.90G [00:14<00:12, 173MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  45% 1.74G/3.90G [00:14<00:12, 175MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  45% 1.76G/3.90G [00:14<00:11, 179MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  46% 1.78G/3.90G [00:15<00:12, 172MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  46% 1.80G/3.90G [00:15<00:12, 174MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  47% 1.82G/3.90G [00:15<00:11, 177MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  47% 1.85G/3.90G [00:16<00:28, 71.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  48% 1.87G/3.90G [00:16<00:23, 87.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  49% 1.90G/3.90G [00:16<00:16, 118MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  49% 1.92G/3.90G [00:16<00:14, 132MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  50% 1.94G/3.90G [00:16<00:13, 143MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  50% 1.96G/3.90G [00:16<00:12, 152MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  51% 1.98G/3.90G [00:16<00:13, 142MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  51% 2.00G/3.90G [00:16<00:13, 144MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  52% 2.02G/3.90G [00:17<00:12, 144MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  52% 2.04G/3.90G [00:17<00:12, 148MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  53% 2.07G/3.90G [00:17<00:12, 152MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  54% 2.09G/3.90G [00:17<00:22, 81.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  54% 2.12G/3.90G [00:18<00:16, 107MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  55% 2.14G/3.90G [00:18<00:14, 119MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  55% 2.16G/3.90G [00:18<00:14, 123MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  56% 2.18G/3.90G [00:18<00:13, 131MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  57% 2.21G/3.90G [00:18<00:10, 156MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  57% 2.23G/3.90G [00:18<00:10, 162MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  58% 2.25G/3.90G [00:18<00:10, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  59% 2.29G/3.90G [00:18<00:09, 174MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  59% 2.31G/3.90G [00:19<00:08, 178MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  60% 2.33G/3.90G [00:19<00:08, 180MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  60% 2.35G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  61% 2.37G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  61% 2.39G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  62% 2.41G/3.90G [00:19<00:08, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  62% 2.43G/3.90G [00:19<00:08, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  63% 2.45G/3.90G [00:19<00:08, 177MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  64% 2.47G/3.90G [00:20<00:11, 124MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  64% 2.51G/3.90G [00:20<00:09, 149MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  65% 2.53G/3.90G [00:22<00:40, 34.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  66% 2.56G/3.90G [00:22<00:26, 50.1MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  66% 2.58G/3.90G [00:22<00:21, 60.1MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  67% 2.60G/3.90G [00:22<00:18, 69.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  67% 2.62G/3.90G [00:22<00:15, 84.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  68% 2.64G/3.90G [00:22<00:12, 99.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  68% 2.66G/3.90G [00:23<00:12, 96.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  69% 2.68G/3.90G [00:23<00:12, 95.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  69% 2.71G/3.90G [00:23<00:14, 84.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  70% 2.73G/3.90G [00:23<00:14, 82.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  70% 2.74G/3.90G [00:24<00:14, 80.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  70% 2.75G/3.90G [00:24<00:15, 75.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  71% 2.76G/3.90G [00:24<00:15, 75.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  71% 2.77G/3.90G [00:24<00:15, 72.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  71% 2.78G/3.90G [00:24<00:14, 74.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  72% 2.79G/3.90G [00:24<00:14, 74.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  72% 2.80G/3.90G [00:25<00:15, 69.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  72% 2.81G/3.90G [00:25<00:15, 71.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  72% 2.82G/3.90G [00:25<00:13, 77.5MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  73% 2.84G/3.90G [00:25<00:12, 84.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  73% 2.85G/3.90G [00:25<00:12, 83.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  73% 2.86G/3.90G [00:25<00:12, 81.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  74% 2.88G/3.90G [00:25<00:10, 97.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  75% 2.90G/3.90G [00:26<00:08, 118MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  75% 2.93G/3.90G [00:26<00:07, 134MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  76% 2.95G/3.90G [00:26<00:06, 149MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  76% 2.97G/3.90G [00:26<00:05, 159MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  77% 2.99G/3.90G [00:27<00:23, 37.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  77% 3.02G/3.90G [00:27<00:15, 57.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  78% 3.04G/3.90G [00:28<00:12, 67.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  79% 3.06G/3.90G [00:28<00:10, 78.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  79% 3.08G/3.90G [00:28<00:08, 92.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  80% 3.10G/3.90G [00:28<00:07, 109MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  80% 3.14G/3.90G [00:28<00:05, 138MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  81% 3.16G/3.90G [00:28<00:05, 146MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  82% 3.18G/3.90G [00:28<00:04, 152MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  82% 3.20G/3.90G [00:29<00:04, 161MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  83% 3.22G/3.90G [00:29<00:03, 170MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  83% 3.24G/3.90G [00:29<00:04, 158MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  84% 3.26G/3.90G [00:29<00:04, 156MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  84% 3.28G/3.90G [00:29<00:03, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  85% 3.30G/3.90G [00:29<00:03, 162MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  85% 3.32G/3.90G [00:29<00:03, 160MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  86% 3.34G/3.90G [00:29<00:03, 171MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  87% 3.38G/3.90G [00:30<00:02, 191MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  87% 3.40G/3.90G [00:30<00:02, 188MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  88% 3.42G/3.90G [00:30<00:02, 187MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  88% 3.44G/3.90G [00:30<00:02, 182MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  89% 3.46G/3.90G [00:30<00:02, 183MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  89% 3.48G/3.90G [00:30<00:02, 183MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  90% 3.50G/3.90G [00:30<00:02, 184MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  90% 3.52G/3.90G [00:30<00:02, 185MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  91% 3.54G/3.90G [00:30<00:01, 183MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  91% 3.57G/3.90G [00:31<00:05, 55.5MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  92% 3.59G/3.90G [00:32<00:08, 38.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  93% 3.61G/3.90G [00:32<00:05, 50.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  93% 3.63G/3.90G [00:33<00:04, 65.0MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  94% 3.65G/3.90G [00:33<00:03, 80.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  94% 3.67G/3.90G [00:33<00:02, 97.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  95% 3.69G/3.90G [00:33<00:01, 113MB/s] \u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  95% 3.71G/3.90G [00:33<00:01, 128MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  96% 3.73G/3.90G [00:33<00:01, 139MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  96% 3.75G/3.90G [00:33<00:00, 153MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  97% 3.77G/3.90G [00:33<00:00, 158MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  97% 3.80G/3.90G [00:34<00:00, 165MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  98% 3.82G/3.90G [00:34<00:00, 167MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  98% 3.84G/3.90G [00:34<00:00, 169MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors:  99% 3.86G/3.90G [00:34<00:00, 174MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Downloading model.safetensors: 100% 3.90G/3.90G [00:34<00:00, 113MB/s]\n",
            "Fetching 15 files: 100% 15/15 [00:36<00:00,  2.41s/it]\n",
            "/content/llama2-webui\n",
            "Running on GPU with backend torch transformers.\n",
            "2023-08-26 07:14:25.222792: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n",
            "Caching examples at: '/content/llama2-webui/gradio_cached_examples/19'\n",
            "Caching example 1/5\n",
            "Caching example 2/5\n",
            "Caching example 3/5\n",
            "Caching example 4/5\n",
            "Caching example 5/5\n",
            "Caching complete\n",
            "\n",
            "Running on local URL:  http://127.0.0.1:7860\n",
            "Running on public URL: https://71c3606942c440e7dd.gradio.live\n",
            "\n",
            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n",
            "Keyboard interruption in main thread... closing server.\n",
            "Traceback (most recent call last):\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2130, in block_thread\n",
            "    time.sleep(0.1)\n",
            "KeyboardInterrupt\n",
            "\n",
            "During handling of the above exception, another exception occurred:\n",
            "\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/llama2-webui/app.py\", line 322, in <module>\n",
            "    main()\n",
            "  File \"/content/llama2-webui/app.py\", line 318, in main\n",
            "    demo.queue(max_size=20).launch(share=args.share)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2046, in launch\n",
            "    self.block_thread()\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2132, in block_thread\n",
            "    print(\"Keyboard interruption in main thread... closing server.\")\n",
            "KeyboardInterrupt\n",
            "Killing tunnel 127.0.0.1:7860 <> https://71c3606942c440e7dd.gradio.live\n",
            "terminate called without an active exception\n"
          ]
        }
      ]
    }
  ]
}